/*
    Copyright 2006-2011 Patrik Jonsson, sunrise@familjenjonsson.org

    This file is part of Sunrise.

    Sunrise is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    Sunrise is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Sunrise.  If not, see <http://www.gnu.org/licenses/>.

*/

/// \file
/// Definition of the full_sed_emergence class.

#include "config.h"
#include "full_sed_emergence.h"
#include "emergence-fits.h"
#include "CCfits/CCfits"
#include <sstream> 
#include "biniostream.h"
#include "fits-utilities.h"
#include "boost/thread.hpp"
#include "boost/shared_ptr.hpp"
#include <sys/shm.h>
#include "hpm.h"
#include "mcrx-debug.h"
#include "units.h"

using namespace blitz;
using namespace std;
using namespace CCfits;

mcrx::full_sed_emergence::full_sed_emergence (const CCfits::FITS& file) :
  emergence<array_1> (file) 
{};

/** Writes the camera images to a FITS file.  The images are written
    as data cubes in HDU's CAMERAi_NONSCATTER in the specified
    file. Because these three-dimensional data cubes are very large,
    the HDU's are written using the cfitsio tile-compression format.
    This writing takes a long time, so to speed it up, it's done for
    all cameras in parallel through the use of multiple threads and an
    independent process (cmprauxfits.cc). NOTE that writing the images
    also frees the image arrays, to save memory. (This is probably
    dumb.) */
void mcrx::full_sed_emergence::write_images (CCfits::FITS& file,
					     T_float normalization,
					     const T_unit_map& units,
					     const std::string& hduname,
					     bool do_compress,
					     bool parallel_compress) 
{
  using namespace CCfits;
  using namespace blitz;
  assert(normalization > 0);
  
  if(do_compress && parallel_compress)
    parallel_write_images (file, normalization, units, hduname);
  else
    serial_write_images (file, normalization, units, hduname, do_compress);
}

/** Writes images to a FITS file.  If do_compress is set, the images
    are compressed using the cfitsio tile compression algorithm. After
    the completion of this file, the camera arrays are set to zero, so
    any analysis of the info needs to happen before calling this
    function. (This was done to minimize memory usage.) */
void mcrx::full_sed_emergence::serial_write_images (CCfits::FITS& file,
						    T_float normalization,
						    const T_unit_map& units,
						    const std::string& hduname,
						    bool do_compress)
{      
  for (int c=0; c< n_cameras(); ++c) {
    T_camera& cam = get_camera (c);
    array_3& image = cam.get_image ();

    // get the total image on master task
    mpi_sum_arrays(image);
    if(!is_mpi_master())
      continue;

    assert  (image.size() > 0);
    ASSERT_ALL (image < blitz::huge (T_float ()));
    ASSERT_ALL (image >= 0);

    std::ostringstream ost;
    ost << "CAMERA" << c << hduname;
    
    cout << ost.str()<< endl;

    // check if HDU already exists
    CCfits::ExtHDU* hdu;    
    try {
      hdu = &open_HDU (file, ost.str ());
      // it would probably be good to check that image dimensions are
      // consistent
    }
    catch (CCfits::FITS::NoSuchHDU&) {
      std::vector<long> naxes;
      naxes.push_back(image.extent(firstDim ) );
      naxes.push_back(image.extent(secondDim ) );
      naxes.push_back(image.extent(thirdDim ) );
      if (do_compress) {
       // use the cfitsio tile compression on these images, put each
       // wavelength slice in one tile
       int status;
       std::vector<long> tile_size (naxes);
       tile_size [2] = 1 ;
       file.setCompressionType(GZIP_1);
       file.setTileDimensions(tile_size);
      } 
      hdu = file.addImage (ost.str(), FLOAT_IMG, naxes);
      file.setCompressionType(0);

      hdu->writeComment("This HDU contains the full SED camera image, without any scattering.  The third dimension of the data cube is wavelength, see HDU LAMBDA for what the wavelengths are.");
    }
    
    // write wcs keywords
    cam.write_wcs(*hdu, units);

    const T_float unitconv = write_units(*hdu, cam, units, normalization);
    DEBUG(1,cout << "Raw image min/max: " << min(image) << ' ' << max(image) << endl;)
    cout << "Normalizing image"<< endl;
    // We don't need to use a temporary image, we can simply normalize
    // image and write it. cfitsio will convert to float.
    image *=(normalization*unitconv);
    DEBUG(1,cout << "Normalized image min/max: " << min(image) << ' ' << max(image) << endl;)
      
    if (do_compress)
      cout << "Compressing image"<<endl;
    else 
      cout << "Writing image"<<endl;

    // this writes in slices, so saves memory.
    try {
      write(*hdu, image);
    }
    catch (...) {
      cerr << "Error writing image -- probably image size mismatch. Try deleting the file." << endl;
      throw;
    }

    DEBUG(3,array_1 integrated(sum(sum(image(tensor::k, tensor::j, tensor::i),tensor::k), tensor::j));cout << "Saving Ratio " << integrated(535)/integrated(532) << ' '<< integrated(615)/integrated(614) << endl;);

    // NOTE that camera image array is freed here!
    image.free();

    // and update checksum
    hdu->writeChecksum();
  }
  
}


/** Writes images to a FITS file by compressing them in parallel using
    the cmprauxfits executable.  Note that there is no option here
    whether or not to compress, because doing it in parallel only
    makes sense if we are compressing.  */ 
void mcrx::full_sed_emergence::parallel_write_images (CCfits::FITS& file,
						      T_float normalization,
						      const T_unit_map& units,
						      const std::string& hduname)
{      
  boost::thread_group threads;
  vector<boost::shared_ptr<thread> > thread_objects;

  for (int c=0; c< n_cameras(); ++c) {
    T_camera& cam = get_camera (c);
    array_3& image = cam.get_image ();

    // get the total image on master task
    mpi_sum_arrays(image);
    if(!is_mpi_master())
      continue;

    assert  (image.size() > 0);
    assert (all (image < blitz::huge (T_float ())));
    std::ostringstream ost;
    ost << "CAMERA" << c << hduname;
    cout << ost.str()<< endl;

    std::ostringstream ost2;
    ost2 << file.name() << ".full_sed_emergence_tempfile" << c;
    const string dump_name = ost2.str ();
    const string temp_name = ost2.str () + ".fits";
    CCfits::FITS tempfile (temp_name, Write);
    
    // check if HDU already exists
    ExtHDU* hdu;    
    std::vector<long> naxes;
    naxes.push_back(image.extent(firstDim ) );
    naxes.push_back(image.extent(secondDim ) );
    naxes.push_back(image.extent(thirdDim ) );

    // use the cfitsio tile compression on these images, put each
    // wavelength slice in one tile
    int status;
    std::vector<long> tile_size (naxes);
    tile_size [2] = 1 ;
    file.setCompressionType(GZIP_1);
    file.setTileDimensions(tile_size);
    hdu = tempfile.addImage (ost.str(), FLOAT_IMG, naxes);
    file.setCompressionType(0);

    hdu->writeComment("This HDU contains the full SED camera image, without any scattering.  The third dimension of the data cube is wavelength, see HDU LAMBDA for what the wavelengths are.");
    
    // write wcs keywords
    cam.write_wcs(*hdu, units);

    // write units
    const T_float unitconv = write_units(*hdu, cam, units, normalization);
    thread_objects.push_back(boost::shared_ptr<thread> 
			     (new thread (c, temp_name, image,
					  normalization*unitconv )));
  } // for camera

  if(!is_mpi_master())
    return;
  
  // now spawn threads
  cout << "Spawning compression threads" << endl;
  for (int i = 0; i < thread_objects.size(); ++i)
    threads.create_thread(*thread_objects [i] );

  threads.join_all();

  cout << "Copying temporary files into output file" << endl;
  //hpm::hpmstart (35, "Copying temporary files");
  // and copy the written images into our file
  for (int i = 0; i < thread_objects.size(); ++i) {
    if (thread_objects[i]->status !=0) {
      // The compression failed one way or another
      cerr << "FATAL: Compression thread " << i << " failed." << endl;
      throw -1;
    }

    const int camera_number = thread_objects [i]->camera_number;
    {
      const array_3& image = get_camera (camera_number).get_image();
      std::vector<long> tile_size (3);
      tile_size [0] =image.extent(firstDim );
      tile_size [1] = image.extent(secondDim );
      tile_size [2] = 1 ;
      int status = 0;
      file.setCompressionType(GZIP_1);
      file.setTileDimensions(tile_size);
      CCfits::FITS tempfile (thread_objects [i]->file_name, CCfits::Read);
      file.copy(tempfile.extension(1));
      file.setCompressionType(0);
    }

    unlink((thread_objects [i]->file_name).c_str());
  }
  //hpm::hpmstop (35);
}

/** The thread start function.  This function executes the auxiliary
    compression program in cmprauxfits.cc. This is necessary since
    cfitsio is not thread safe.  The image data is communicated
    through a shared memory segment. */
void mcrx::full_sed_emergence::thread::operator () ()
{
  // Make shared memory segment and copy image data
  //hpm::hpmtstart(33," Normalized");
  
  key_t key = ftok (file_name.c_str(), 'A');
  const int shared_ID = shmget (key, image.size()*sizeof (float),
				0644 | IPC_CREAT);
  if (shared_ID == - 1) {
    cerr << "Fatal: Could not create shared memory segment" << endl;
    status=1;
    return;
  }

  float* shared_data = reinterpret_cast<float*> (shmat (shared_ID, 0,0));
  assert (shared_data);
  
  cout << "Normalizing image\n";
    
  blitz::Array<float, 3> temp_image (shared_data, image.shape(),
				     neverDeleteData,
				     ColumnMajorArray<3> (contiguousData));
  temp_image =normalization*image;
  // Free the image array to minimize memory usage
  image.free();
  //hpm::hpmtstop (33);
    
  //hpm::hpmtstart (34, "cmprauxfits");

  system (("exec cmprauxfits " + file_name).c_str ());

  // detach from shared memory
  shmdt (shared_data);
  shmctl (shared_ID, IPC_RMID,0);

  //hpm::hpmtstop (34);

  cout << file_name << " done\n";
}


/** Convert image units. The image units are per area in the length
    units of the camera distance units, which needs to be converted to
    m. We also need to set the solid angle normalization. The camera
    response function was used when adding rays. This response
    function is dA/dOmega for the position on the image plane, where A
    is the area on the normalized image plane. What remains is to
    divide out the area of one pixel on the normalized image plane,
    which is just 1/(nx*ny).  */
mcrx::T_float
mcrx::full_sed_emergence::write_units (CCfits::ExtHDU& hdu,
				       T_camera& c,
				       const T_unit_map& units,
				       T_float normalization)
{
  const T_float to_m = units::convert(units.get("length"),"m");
  const T_float sb_factr = 1./(to_m*to_m*c.pixel_normalized_area());
  const string sb_unit = units.get("L_lambda") + "/m^2/sr";
  const string sb_factr_unit = "m^2/(" + units.get("length") + "^2)";

  assert(sb_factr > 0);

  if(is_mpi_master()) {
    hdu.addKey("imunit", sb_unit,
	       "Pixel quantity is surface brightness");
    hdu.addKey("unitconv", sb_factr,
	       "[" + sb_factr_unit + "] Internal units to surface brightness");
    hdu.addKey("normalization", normalization,
	       "Image normalization (internal usage)");
  }

  return sb_factr;
}


/** Loads images from a FITS file.  The camera image arrays are loaded
    from the CAMERAi-NONSCATTER HDU's. */
void mcrx::full_sed_emergence::load_images (const CCfits::FITS& file,
					    const std::string& hduname)
{
  cout << "Loading image data from file" << endl;

  for (int c=0; c< n_cameras(); ++c) {
    T_camera& cam = get_camera (c);

    std::ostringstream ost;
    ost << "CAMERA" << c << hduname;
    cout << "  " << ost.str()<< endl;
    const CCfits::ExtHDU& hdu = open_HDU (file, ost.str() );
    assert (hdu.axis(0) == cam.xsize ());
    assert (hdu.axis(1 ) == cam.ysize ());

    array_3& image = cam.get_image ();

    // resize the image because it's not necessarily allocated at this point
    image.resize(hdu.axis(0 ),hdu.axis(1),hdu.axis(2) );

    // read image on task 0, zero it out on the other tasks.
    if(is_mpi_master()) 
      read (hdu, image);
    else
      image=0;

    // check for a normalization keyword
    float normalization = 1 ;
    float sb_factr = 1;
    try {
      const_cast<CCfits::ExtHDU&> (hdu).readKey (string ("normalization"),
						 normalization);
    }
    catch (...) {}
    try {
      const_cast<CCfits::ExtHDU&> (hdu).readKey (string ("sb_factr"),
						sb_factr);
    }
    catch (...) {}
    if (normalization*sb_factr != 1)
      image*= 1/(normalization*sb_factr) ;
  }
}

/** Dump the image arrays to a temporary dump file.  The main purpose
    here is to be fast, since we have gotten a termination signal. */
void mcrx::full_sed_emergence::write_dump (binofstream& file) const
{
  for (int c=0; c< n_cameras(); ++c) {
    const array_3& image = get_camera (c).get_image();
    // We need to save the image size as well because it's not
    // allocated until the first ray is shot
    TinyVector<int, 3> extent = image.shape();
    file << extent [0]
	 << extent [1]
	 << extent [2];
    DEBUG(1,cout << "Dumping image data for camera " << c << ", " << extent << " bytes" << endl;);
    file.write(reinterpret_cast<const char*> (image.dataFirst()),
	       image.storageSize()*sizeof (array_3::T_numtype) );
  } 
}

/** Load the image arrays from a temporary dump file. */
bool mcrx::full_sed_emergence::load_dump (binifstream& file) 
{
  for (int c=0; c< n_cameras(); ++c) {
    T_camera& cam = get_camera (c);

    array_3& image = cam.get_image ();
    TinyVector< int, 3> size;
    file >> size [0]
	 >>  size [1]
	 >>  size [2];
    // it's possible we will load empty images here if we dumped
    // before any rays had been added.
    assert ((size [0] == 0) ||(size [0] == cam.xsize ()));
    assert ((size [1] == 0) ||(size [1] == cam.ysize ()));
    image.resize(size );
    DEBUG(1,cout << "Loading image data for camera " << c << ", " << size << " pixels" << endl;);
    file.read (reinterpret_cast<char*> (image.dataFirst()),
	       image.storageSize()*sizeof (array_3::T_numtype) );
  }

  return file.good();
}




